{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "问题：\n",
    "从一个大字符串，提取多个标签tag，变成普通的列\n",
    "'''\n",
    "data_dict = {\n",
    "    'tags': [\n",
    "        '#good#vibes',\n",
    "        '#hot#summer#holiday',\n",
    "        '#street#food',\n",
    "        '#workout'\n",
    "    ]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(data_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>#good#vibes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>#hot#summer#holiday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>#street#food</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>#workout</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  tags\n",
       "0          #good#vibes\n",
       "1  #hot#summer#holiday\n",
       "2         #street#food\n",
       "3             #workout"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4 entries, 0 to 3\n",
      "Data columns (total 1 columns):\n",
      " #   Column  Non-Null Count  Dtype \n",
      "---  ------  --------------  ----- \n",
      " 0   tags    4 non-null      object\n",
      "dtypes: object(1)\n",
      "memory usage: 160.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>good</td>\n",
       "      <td>vibes</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>hot</td>\n",
       "      <td>summer</td>\n",
       "      <td>holiday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>street</td>\n",
       "      <td>food</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>workout</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  0        1       2        3\n",
       "0       good   vibes     None\n",
       "1        hot  summer  holiday\n",
       "2     street    food     None\n",
       "3    workout    None     None"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df[\"tags\"].str.split(\"#\", expand=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'[0] not found in axis'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[27], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39m# 去掉无效的第0列\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39;49mdrop(columns\u001b[39m=\u001b[39;49m[\u001b[39m0\u001b[39;49m])\n\u001b[1;32m      3\u001b[0m df\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/util/_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[1;32m    326\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(\n\u001b[1;32m    327\u001b[0m         msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[1;32m    328\u001b[0m         \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[1;32m    329\u001b[0m         stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[1;32m    330\u001b[0m     )\n\u001b[0;32m--> 331\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/core/frame.py:5399\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   5251\u001b[0m \u001b[39m@deprecate_nonkeyword_arguments\u001b[39m(version\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, allowed_args\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mself\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mlabels\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m   5252\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdrop\u001b[39m(  \u001b[39m# type: ignore[override]\u001b[39;00m\n\u001b[1;32m   5253\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   5260\u001b[0m     errors: IgnoreRaise \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mraise\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m   5261\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m   5262\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m   5263\u001b[0m \u001b[39m    Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[1;32m   5264\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   5397\u001b[0m \u001b[39m            weight  1.0     0.8\u001b[39;00m\n\u001b[1;32m   5398\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 5399\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mdrop(\n\u001b[1;32m   5400\u001b[0m         labels\u001b[39m=\u001b[39;49mlabels,\n\u001b[1;32m   5401\u001b[0m         axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m   5402\u001b[0m         index\u001b[39m=\u001b[39;49mindex,\n\u001b[1;32m   5403\u001b[0m         columns\u001b[39m=\u001b[39;49mcolumns,\n\u001b[1;32m   5404\u001b[0m         level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m   5405\u001b[0m         inplace\u001b[39m=\u001b[39;49minplace,\n\u001b[1;32m   5406\u001b[0m         errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m   5407\u001b[0m     )\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/util/_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[1;32m    326\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(\n\u001b[1;32m    327\u001b[0m         msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[1;32m    328\u001b[0m         \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[1;32m    329\u001b[0m         stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[1;32m    330\u001b[0m     )\n\u001b[0;32m--> 331\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/core/generic.py:4505\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   4503\u001b[0m \u001b[39mfor\u001b[39;00m axis, labels \u001b[39min\u001b[39;00m axes\u001b[39m.\u001b[39mitems():\n\u001b[1;32m   4504\u001b[0m     \u001b[39mif\u001b[39;00m labels \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 4505\u001b[0m         obj \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39;49m_drop_axis(labels, axis, level\u001b[39m=\u001b[39;49mlevel, errors\u001b[39m=\u001b[39;49merrors)\n\u001b[1;32m   4507\u001b[0m \u001b[39mif\u001b[39;00m inplace:\n\u001b[1;32m   4508\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_update_inplace(obj)\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/core/generic.py:4546\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[0;34m(self, labels, axis, level, errors, only_slice)\u001b[0m\n\u001b[1;32m   4544\u001b[0m         new_axis \u001b[39m=\u001b[39m axis\u001b[39m.\u001b[39mdrop(labels, level\u001b[39m=\u001b[39mlevel, errors\u001b[39m=\u001b[39merrors)\n\u001b[1;32m   4545\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 4546\u001b[0m         new_axis \u001b[39m=\u001b[39m axis\u001b[39m.\u001b[39;49mdrop(labels, errors\u001b[39m=\u001b[39;49merrors)\n\u001b[1;32m   4547\u001b[0m     indexer \u001b[39m=\u001b[39m axis\u001b[39m.\u001b[39mget_indexer(new_axis)\n\u001b[1;32m   4549\u001b[0m \u001b[39m# Case for non-unique axis\u001b[39;00m\n\u001b[1;32m   4550\u001b[0m \u001b[39melse\u001b[39;00m:\n",
      "File \u001b[0;32m~/opt/anaconda3/envs/LearningPython3.8/lib/python3.8/site-packages/pandas/core/indexes/base.py:6934\u001b[0m, in \u001b[0;36mIndex.drop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m   6932\u001b[0m \u001b[39mif\u001b[39;00m mask\u001b[39m.\u001b[39many():\n\u001b[1;32m   6933\u001b[0m     \u001b[39mif\u001b[39;00m errors \u001b[39m!=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m-> 6934\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlist\u001b[39m(labels[mask])\u001b[39m}\u001b[39;00m\u001b[39m not found in axis\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   6935\u001b[0m     indexer \u001b[39m=\u001b[39m indexer[\u001b[39m~\u001b[39mmask]\n\u001b[1;32m   6936\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdelete(indexer)\n",
      "\u001b[0;31mKeyError\u001b[0m: '[0] not found in axis'"
     ]
    }
   ],
   "source": [
    "# 去掉无效的第0列\n",
    "df = df.drop(columns=[0])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns = [\"tag1\", \"tag2\", \"tag3\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tag1</th>\n",
       "      <th>tag2</th>\n",
       "      <th>tag3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>good</td>\n",
       "      <td>vibes</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>hot</td>\n",
       "      <td>summer</td>\n",
       "      <td>holiday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>street</td>\n",
       "      <td>food</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>workout</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      tag1    tag2     tag3\n",
       "0     good   vibes     None\n",
       "1      hot  summer  holiday\n",
       "2   street    food     None\n",
       "3  workout    None     None"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "LearningPython3.8",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "513f08581c74d40101b89b87473499d0d0c77aa24200908e71193a6a8a6d36b9"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
